# ===================
# REVIEW SCORES
# ===================

# Calculate reviewer scores
# =======================================================================================
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

vec <- c(1, 2, 3, 4)

# Conditions
conditions_better <- c("tmp$V1<= tmp$V1[i]", "& tmp$V2 <=  tmp$V2[i]", "& tmp$V3 <=  tmp$V3[i]", "& tmp$V4 <=  tmp$V4[i]", "& tmp$V5 <=  tmp$V5[i]", "& tmp$V6 <=  tmp$V6[i]", "& tmp$V7 <=  tmp$V7[i]", "& tmp$V8 <=  tmp$V8[i]", "& tmp$V9 <=  tmp$V9[i]")
conditions_worse <- c("tmp$V1>= tmp$V1[i]", "& tmp$V2 >=  tmp$V2[i]", "& tmp$V3 >=  tmp$V3[i]", "& tmp$V4 >=  tmp$V4[i]", "& tmp$V5 >=  tmp$V5[i]", "& tmp$V6 >=  tmp$V6[i]", "& tmp$V7 >=  tmp$V7[i]", "& tmp$V8 >=  tmp$V8[i]", "& tmp$V9 >=  tmp$V9[i]")


df_rev_score <- cbind.data.frame(comb = c("1", "2", "3", "4"), better = c(0,1,2,3), worse = c(3,2,1,0), unclear = c(0,0,0,0))

# calculate scores
# ===================
df_rev_score <- df_rev_score %>% mutate(rev_score_opt = worse/(better+worse)
                                        , rev_score_pess = worse/(better+worse+unclear)
                                        , n_review = 1
                                        , disagree_score = 0
)

for(m in 2:9){
  tmp_main <- as.data.frame(CombSet(vec, m = m, repl = T, ord = F, as.list = FALSE))
  
  # 1) REVIEW SCORE
  tmp <- tmp_main %>% unite("comb", num_range("V", 1:m), sep = "-", remove = FALSE)
  
  tmp$better <- NA
  tmp$worse <- NA
  
  for(i in 1:length(tmp$comb)){
    # get temporal conditions
    cond.b.tmp <- paste(conditions_better[1:m], sep = " ", collapse = " ")
    cond.w.tmp <- paste(conditions_worse[1:m], sep = " ", collapse = " ")
    # evaluate  
    tmp$better[i] <- length(which(eval(parse(text = cond.b.tmp)))) -1
    tmp$worse[i] <- length(which(eval(parse(text = cond.w.tmp)))) -1
  }
  # calculate number of unclear cases
  tmp <- tmp %>% mutate(unclear = (nrow(tmp)-1)-(better+worse))
  # calculate scores
  tmp <- tmp %>% mutate(rev_score_opt = worse/(better+worse)
                        , rev_score_pess = worse/(better+worse+unclear))
  # number of reviews
  tmp <- tmp %>% mutate(n_review = m)
  # select vars
  tmp <- tmp %>% select(comb, better, worse, unclear, rev_score_opt, rev_score_pess, n_review)
  
  # 2) DISAGREEMENT SCORE
  tmp2 <- tmp_main %>% unite("comb", num_range("V", 1:m), sep = "-", remove = FALSE)
  
  tmp2$median <- apply(tmp2[,2:(m+1)],1, median)
  
  tmp2 <- tmp2 %>% ungroup() %>% gather("var", "val", 2:(m+1))
  
  tmp2 <- tmp2 %>% group_by(comb) %>% mutate(dist = median-val) %>%
    summarise(change_rev = sum(abs(dist)))
  
  # combine 1+ 2 and calculate disagree score
  tmp <- tmp %>% left_join(tmp2, by = "comb") %>% mutate(disagree_score = change_rev/n_review) %>% select(-change_rev)
  # append to main data set
  df_rev_score <- bind_rows(df_rev_score, tmp)
}


# get recommendations per manuscript
# for manuscripts without missing gender information
review_sample <- reviewer_data %>% 
  group_by(manuscript_id) %>% 
  mutate(n_miss_fem =  sum(is.na(r_gender) == TRUE)) %>% 
  ungroup() %>%
  filter(n_miss_fem == 0 & is.na(gender_type3) == FALSE)

review_sample <- review_sample %>% select(manuscript_id, recommendation)

# Get review combinations
review_sample <- review_sample %>% 
  mutate(rec = as.numeric(factor(recommendation, levels = c("Accept", "Minor Revision", "Major Revision", "Reject") )))

review_sample <- review_sample %>% group_by(manuscript_id) %>% 
  arrange(rec) %>% 
  summarise(comb = paste(rec, collapse = "-"), rec_detail = paste0(recommendation, collapse = ";"), n_review = n())

# Merge information on review scores
review_sample <- review_sample %>%
  left_join(df_rev_score, by = c("comb", "n_review")) 

# Merge to manuscript data
dim(manuscript_data)
manuscript_data <- manuscript_data %>% 
  left_join(review_sample, by = "manuscript_id")
dim(manuscript_data)

# Add info on reviewership to manuscript data 
to_merge <-  reviewer_data %>% 
  filter(is.na(recommendation) == FALSE) %>% 
  distinct() %>% 
  mutate( 
    fem_r = ifelse(r_gender == "female", 1, 0)
    , male_r = ifelse(r_gender == "male", 1, 0)) %>%
  group_by(manuscript_id, subfield) %>%
  summarise(one_fem = ifelse(sum(fem_r) == 1, 1, 0)
            , two_fem = ifelse(sum(fem_r) == 2, 1, 0)
            , three_more_fem = ifelse(sum(fem_r) >=3, 1, 0)
            , atleastone = ifelse(sum(fem_r) >=1, 1, 0)
            , atleastonemale = ifelse(sum(male_r) >=1, 1, 0)
            , n_reviewer = n()
            , n_fem_rev = sum(fem_r)
            , reviewership = ifelse(atleastone == 1 & atleastonemale == 1, "mixed", ifelse(atleastonemale == 0, "only female", ifelse(atleastone == 0 & atleastonemale == 1, "only male", NA)))
  )

dim(manuscript_data)
manuscript_data <- manuscript_data %>% 
  left_join(to_merge, by = "manuscript_id")
dim(manuscript_data)


# make outcome variable first decision
dim(manuscript_data)
manuscript_data <- manuscript_data %>% 
  mutate(non_reject = ifelse(str_detect(first_decision, "Reject"), 0, 1))
dim(manuscript_data)
